syntax = "proto3";

package tensorflow;

import "tensorflow/core/framework/allocation_description.proto";
import "tensorflow/core/framework/tensor_description.proto";

option cc_enable_arenas = true;
option java_outer_classname = "StepStatsProtos";
option java_multiple_files = true;
option java_package = "org.tensorflow.framework";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framework/step_stats_go_proto";

// An allocation/de-allocation operation performed by the allocator.
message AllocationRecord {
  // The timestamp of the operation.
  int64 alloc_micros = 1;
  // Number of bytes allocated, or de-allocated if negative.
  int64 alloc_bytes = 2;
}

message AllocatorMemoryUsed {
  string allocator_name = 1;
  // These are per-node allocator memory stats.
  int64 total_bytes = 2;
  int64 peak_bytes = 3;
  // The bytes that are not deallocated.
  int64 live_bytes = 4;
  // The allocation and deallocation timeline.
  repeated AllocationRecord allocation_records = 6;

  // These are snapshots of the overall allocator memory stats.
  // The number of live bytes currently allocated by the allocator.
  int64 allocator_bytes_in_use = 5;
}

// Output sizes recorded for a single execution of a graph node.
message NodeOutput {
  int32 slot = 1;
  TensorDescription tensor_description = 3;
}

// For memory tracking.
message MemoryStats {
  int64 temp_memory_size = 1;
  int64 persistent_memory_size = 3;
  repeated int64 persistent_tensor_alloc_ids = 5;

  int64 device_temp_memory_size = 2 [deprecated = true];
  int64 device_persistent_memory_size = 4 [deprecated = true];
  repeated int64 device_persistent_tensor_alloc_ids = 6 [deprecated = true];
}

// Time/size stats recorded for a single execution of a graph node.
message NodeExecStats {
  // TODO(tucker): Use some more compact form of node identity than
  // the full string name.  Either all processes should agree on a
  // global id (cost_id?) for each node, or we should use a hash of
  // the name.
  string node_name = 1;
  int64 all_start_micros = 2;
  int64 op_start_rel_micros = 3;
  int64 op_end_rel_micros = 4;
  int64 all_end_rel_micros = 5;
  repeated AllocatorMemoryUsed memory = 6;
  repeated NodeOutput output = 7;
  string timeline_label = 8;
  int64 scheduled_micros = 9;
  uint32 thread_id = 10;
  repeated AllocationDescription referenced_tensor = 11;
  MemoryStats memory_stats = 12;
  int64 all_start_nanos = 13;
  int64 op_start_rel_nanos = 14;
  int64 op_end_rel_nanos = 15;
  int64 all_end_rel_nanos = 16;
  int64 scheduled_nanos = 17;
}

message DeviceStepStats {
  string device = 1;
  repeated NodeExecStats node_stats = 2;
  // Its key is thread id.
  map<uint32, string> thread_names = 3;
}

message StepStats {
  repeated DeviceStepStats dev_stats = 1;
}
